
# ==============================================================================
# DESCRIPTION ----
# ==============================================================================

# This code combines all loan data from Nikkei NEEDS into a single time 
# series dataset
# NOTE: NIKKEI NEEDS DATA CANNOT BE PROVIDED FOR LEGAL REASONS.

# ==============================================================================
# LIBRARIES AND IMPORT ----
# ==============================================================================

library(tidyverse)
library(lubridate)

source("code/0.2_functions.R")

# ==============================================================================
# READ IN ALL DATA FILES, CLEAN, AND APPEND INTO ONE DATAFRAME ----
# ==============================================================================

#### Read in all loan files from directories ####
ld_public <- read_nikkei(path = "data/nikkei_needs/loans/listed_delisted_public", 
                 extension = "xlsx", filename = T)

jas_public <- read_nikkei(path = "data/nikkei_needs/loans/jasdaq_public", 
                         extension = "xlsx", filename = T)

ld_private <- read_nikkei(path = "data/nikkei_needs/loans/listed_delisted_private", 
                         extension = "xlsx", filename = T)

jas_private <- read_nikkei(path = "data/nikkei_needs/loans/jasdaq_private", 
                          extension = "xlsx", filename = T)

# Remove private loan data from jasdaq public (error in Nikkei NEEDS query)
jas_public <- jas_public %>%
  filter(`Financial Institution Name` == "政府関係金融機関合計")

# Combine public and private loans data sets
loans_public <- bind_rows(ld_public, jas_public)
loans_private <- bind_rows(ld_private, jas_private)

#### Clean public loan data ####
loans_public_clean <- loans_public %>%
  
  # Rename variables
  rename( 
    nikkei_code = `...1`, company = `...2`, date = `...5`,
    debt_long_public = `Long-Term Debt`, debt_short_public = `Short-Term Debt`,
    debt_total_public = `Debt Total`
  ) %>%
  
  # Extract year and month from date variable
  mutate( 
    year = as.numeric(substr(date, start = 1, stop = 4)),
    month = as.numeric(substr(date, start = 6, stop = 7))
  ) %>%
  
  # loan data is always recorded in March, drop others
  #filter(month == 3) %>% 
  
  # Keep needed variables only
  select(nikkei_code, company, year, month, 
         debt_long_public, debt_short_public, debt_total_public) %>%
  
  # Convert loan variables to numeric and replace missing with zero
  mutate(
    debt_short_public = as.numeric(case_when(
      debt_short_public == "-" | is.na(debt_short_public) ~ "0", TRUE ~ debt_short_public)),
    debt_long_public = as.numeric(case_when(
      debt_long_public == "-" | is.na(debt_long_public) ~ "0", TRUE ~ debt_long_public)),
    debt_total_public = as.numeric(case_when(
      debt_total_public == "-" | is.na(debt_total_public) ~ "0", TRUE ~ debt_total_public)),
  ) %>%
  
  # Convert to yearly data (usual only one obs per year, but not always)
  group_by(nikkei_code, year) %>%
  summarise(debt_long_public = sum(debt_long_public),
            debt_short_public = sum(debt_short_public), 
            debt_total_public = sum(debt_total_public)) %>% 
  ungroup()

#write.csv(nikkei_loans, file = "data/govt_loans.csv")

#### Clean private loan data ####
loans_private_clean <- loans_private %>%
  
  # Rename variables
  rename( 
    nikkei_code = `...1`, company = `...2`, date = `...5`,
    debt_long_private = `Long-Term Debt`, debt_short_private = `Short-Term Debt`,
    debt_total_private = `Debt Total`
  ) %>%
  
  # Extract year and month from date variable
  mutate( 
    year = as.numeric(substr(date, start = 1, stop = 4)),
    month = as.numeric(substr(date, start = 6, stop = 7))
  ) %>%
  
  # Keep needed variables only
  select(nikkei_code, company, year, month, 
         debt_long_private, debt_short_private, debt_total_private) %>%
  
  # Convert loan variables to numeric and replace missing with zero
  mutate(
    debt_short_private = as.numeric(case_when(
      debt_short_private == "-" | is.na(debt_short_private) ~ "0", TRUE ~ debt_short_private)),
    debt_long_private = as.numeric(case_when(
      debt_long_private == "-" | is.na(debt_long_private) ~ "0", TRUE ~ debt_long_private)),
    debt_total_private = as.numeric(case_when(
      debt_total_private == "-" | is.na(debt_total_private) ~ "0", TRUE ~ debt_total_private)),
  ) %>%
  
  # Convert to yearly data (usual only one obs per year, but not always)
  group_by(nikkei_code, year) %>%
  summarise(debt_long_private = sum(debt_long_private),
            debt_short_private = sum(debt_short_private), 
            debt_total_private = sum(debt_total_private)) %>% 
  ungroup()

#### Combine into one dataframe ####
loans_clean <- left_join(loans_public_clean, loans_private_clean) %>%
  mutate_at(vars(debt_long_private:debt_total_private), ~replace(., is.na(.), 0))
  
# ==============================================================================
# MERGE LOAN DATA WITH COVARIATES ----
# ==============================================================================

# Import nikkei corporate data
load("data/nikkei_financials.RData")

# Merge covariate data onto loan data
loans_covs <- left_join(loans_clean, nikkei_time, by = c("nikkei_code", "year"))

# Merge loan data onto covariate data
covs_loans <- left_join(nikkei_time, loans_clean, by = c("nikkei_code", "year"))

# ==============================================================================
# FINALIZE DATA AND EXPORT ----
# ==============================================================================

# Add estimation variables to dataset
loans_covs <- loans_covs %>% 
  filter_at(vars(6:30), any_vars(!is.na(.))) # Drop firms with no attribute data (~ 2000)

####  Export #### 
save(loans_covs, file = "data/loans_amakudari.Rdata")
save(covs_loans, file = "data/nikkei_financials_loans.Rdata")

